/* NoX (NoC Simulator)
 *
 * Dept. of Computer Science & Engineering, Pennsylvania State University.
 * All Rights Reserved.
 *  
 * 1. License     
 * NoX is distributed free of charge for academic, educational, noncommercial 
 * research purposes as long as this notice in its entirety is preserved in
 * every file included in this package.
 * All commercial use of this program requires separate licence. Contact the
 * author for details.
 * 
 * 2. All the publications that used the simulation results generated by the 
 * NoX should notify the author of the publication information and put 
 * following reference.
 *
 *  http://www.cse.psu.edu/~dpark/nox/
 * 
 * 3. Modification of the source code is permitted and encouraged as long as 
 * it follows the terms described in this copyright notice.
 *
 * 4. The author is not responsible for any problems caused by possible errors
 * of the NoX package. Therefore, users should verify the simulation result
 * before using it in their publication.
 *
 * Dept. of Computer Science & Engineering, Pennsylvania State University.
 * Contact: dpark@cse.psu.edu 
 * 
 * 6. If problems are found with the NoX package, please send an email to the
 * author for discussion and correction.

*/

/* Update History
 *
 * Jan. 31, 2006  Version 1.0 released by Dongkook Park 
 *
 */

/* MAIN.C - main NoX simulation */

#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <zlib.h>
#include <vector>
#include <algorithm>
#include "router.h"
#include "main.h"
#include "nic.h"
#include "link.h"
#include "app.h"
#include "router_common.h"
#include "sim_result.h"
#include "string.h"
#include "declarations.h"
#include "defines.h"

// TR_INTEG begin
#define BASE_CLOCK 0
#define READ_MSG_SIZE 0
#define DEADLOCK_PERIOD  50000 

extern std::vector<source_queue_t> source_queue[MAX_NODES]; 

#ifdef TR_INTEG
#undef INVALID
#include "processor.h"
#include "globals.h"

#undef INVALID
#define INVALID 1

#endif

#ifndef TR_INTEG
#include <vector>
struct nic_entry_t {
  int src;
  int dst;
};
std::vector<nic_entry_t> nic_queue;
#endif

#ifdef TRACE_READ 
int trace_dst, trace_src, trace_msg_len;
long long trace_clock=0;
long long trace_clock_start=0;
int first_read = 1;
int first_print = 0;
int last_read = 1;
int trace_end = 0;
gzFile trace_fp;
char buff[80];
unsigned int last_print = 0;
long long global_clock=0;
long long last_interval=0;
unsigned int num_interval=0;
float last_network_latency=0.0;
unsigned int last_ejt_msg=0;
int last_equal_msg = -1;
#endif

std::vector<sort_t> priority_array;
extern std::vector<source_queue_t> source_queue[MAX_NODES]; 
void print_pri_array();

int nox_sim(long long);
bool first_rank_set = false;
double last_rp[MAX_NODES] = {0};

//TR_INTEG end

#ifndef TR_INTEG
main(int argc, char *argv[])
{

  // Initialize simulator settings.
#ifdef TRACE_READ 
  trace_fp = gzopen(argv[1], "r");
  if(trace_fp == NULL)
  {
    printf("Unable to open trace file!!!\n");
    exit(1);
  }
  sim_init(argc-1, &(argv[1]));
#else  
  sim_init(argc, argv);
#endif

  nox_sim(1000000000);
  return 0;
}
#endif

int nox_sim(long long next_clock_event)
{
#ifdef TR_INTEG
  if(DEBUG)
    printf("***nox clock %lld\n",sim_clock);

  static  long long last_sim_clock=0;
  static  long long repeated_nox_calls=0;
  if(last_sim_clock == sim_clock && sim_clock)
    repeated_nox_calls++;
  else
    last_sim_clock = sim_clock;

#endif

  static double last_cycles=0, last_packets=0, last_delay=0;
#ifdef TRACE_READ
  sim_clock = BASE_CLOCK;
#endif    

  sim_end_flag = INVALID;
#if TR_INTEG
  icn_event_flag = false;
#endif
  static bool exit_sim = false;
  while(sim_end_flag != VALID && sim_clock < next_clock_event/* - CLOCK_FREQ_SCALE*/)
  {

    // Record the cycle when the simulator is warmed up.
    if(num_inj_msg > WARMUP_MSG && warmup_cycle == 0)
      warmup_cycle = sim_clock-sim_start_clock;


    // -------------------------------------------------------------
    // 1. Message injection step.
    // -------------------------------------------------------------
    // TR_INTEG begin
#ifdef TR_INTEG
    // -------------------------------------------------------------
    // Inject Requests from NIC queue filled by pepsi
    // -------------------------------------------------------------
    for(int node = 0; node < NUM_PE; node++)
      for(int n = 0; n < NUM_PE; n++)
      {
        while(!nic_queue[node][n].empty())
        {
          nic_entry_t temp; 
          temp = nic_queue[node][n].back();
          //printf("%d -> %d (%d)\n",temp.src, temp.dst, temp.msg_size);
          //if(1)//temp.clock <= sim_clock/CLOCK_FREQ_SCALE)    
          {
            if(!app_to_nic_output_buf_stage(temp.src, temp.dst, NO,temp.trid, temp.msg_size))
              break;
            nic_queue[node][n].pop_back();
            total_csim_nic_qdelay += sim_clock/CLOCK_FREQ_SCALE - temp.clock;
          }
          /*else
            {
            printf("sim_clock less than packet clock! sim_clock:%lld packet clock:%lld\n",sim_clock,temp.clock);
            break;
            }*/
        }// while
      }//for node

    if(num_inj_msg == num_ejt_msg)
    {
      sim_end_flag = VALID;
      if(next_clock_event != (global_clock + INFINITE_CYCLE)*CLOCK_FREQ_SCALE)
        sim_clock = CLOCK_FREQ_SCALE*(next_clock_event/CLOCK_FREQ_SCALE);
      else 
        if(sim_clock < global_clock)
          sim_clock = CLOCK_FREQ_SCALE*(global_clock/CLOCK_FREQ_SCALE);
      continue;
    }
    //Pepsi Inject end
#else
#ifdef TRACE_READ
    // -------------------------------------------------------------
    // Inject Requests from Traces
    // -------------------------------------------------------------
    if(trace_end && num_ejt_msg == num_inj_msg) 
      sim_end_flag = VALID; // End simulation. 

    if(last_read == 1 && !trace_end)
    {
      if(!gzeof(trace_fp))
      {
        gzgets(trace_fp, buff, 80);
#if READ_MSG_SIZE
        sscanf(buff,"%d %d %d %lld",&trace_src, &trace_dst, &trace_msg_len, &trace_clock) ;
#else
        sscanf(buff,"%d %d %lld",&trace_src, &trace_dst, &trace_clock) ;
        trace_msg_len = MSG_LEN;
#endif

        printf("read trace file transation : %s",buff);
        if(first_read)
        {
          trace_clock_start = trace_clock - BASE_CLOCK ;
          first_read = 0;
          printf("Trace clock starting at %lld\n", trace_clock_start);
        }
        trace_clock -= trace_clock_start;
      }
      else
      {
        trace_end = 1;
        printf("setting trace end here!\n");
        //getchar();
      }
    }
    if(!trace_end)
    {
      if(trace_clock <= sim_clock)
      {
        app_to_nic_output_buf_stage(trace_src, trace_dst, NO, -1, trace_msg_len); 
        printf("Injecting packet %d %d at clock %lld\n",trace_src, trace_dst, trace_clock);
        last_read = 1;
        first_print = 1;
        global_clock = trace_clock + trace_clock_start;
        if( (global_clock / 100) % 1000 == 0 && last_interval != global_clock/100)
        {
          num_interval++;
          last_interval = global_clock / 100;
        }
      } 
      else
      {
        if(first_print)
          printf("Waiting to Injecting packet %d %d at clock %lld now clock %d\n",trace_src, trace_dst, trace_clock, sim_clock);
        last_read = 0;
        first_print = 0;
      }
    }
    // low injection rate
    if(num_ejt_msg == num_inj_msg)
    {
      sim_clock+=CLOCK_FREQ_SCALE;
      continue;
    }
    // TRACE inject end
#else
    // -------------------------------------------------------------
    // Inject Requests for BiModal Traffic.
    // -------------------------------------------------------------
    if(RESP_RQ_TRAFFIC)
    {
      std::vector<nic_entry_t> temp_queue;
      for(int i=0; i< nic_queue.size(); i++)
        if(!app_to_nic_output_buf_stage(nic_queue[i].src,nic_queue[i].dst, NO, -1, MSG_LEN))
          temp_queue.push_back(nic_queue[i]);

      nic_queue.clear();
      for(int i=0; i< temp_queue.size() ; i++)
        nic_queue.push_back(temp_queue[i]);
    }

    if(!source_queue_logic_enabled)
    {
      if(num_inj_msg < MSG_TO_INJECT)
        // First two '-1's indicate that this function is called for 
        // general message injection not for error retransmission.
        if(RESP_RQ_TRAFFIC)
          app_to_nic_output_buf_stage(-1, -1, NO, -1,  1); 
        else
          app_to_nic_output_buf_stage(-1, -1, NO, -1, MSG_LEN);
    }
    else{

      // First two '-1's indicate that this function is called for 
      // general message injection not for error retransmission.
      if(RESP_RQ_TRAFFIC)
        app_to_nic_output_buf_stage(-1, -1, NO, -1, 1); 
      else
        app_to_nic_output_buf_stage(-1, -1, NO, -1, MSG_LEN);
    }

#endif
    //Resp Req Traffic or Synthetic end
#endif
    // TR_INTEG end

    // Move message in NIC output buf to router input buf.
    nic_output_buf_to_router_input_buf_stage();

    // -------------------------------------------------------------
    // 2. Router pipeline step.
    // -------------------------------------------------------------
    // Decodes flit and makes routing decision.
    stage1(); 
    // Crossbar arbitration.
    VA_stage();
    SA_stage();
    //stage2(); 
    // Data transfer through the crossbar. 
    stage3(); 

    //=======================================================================
    //Stall Time Fairness Support
    //=======================================================================
    //update_interference_cycles();
    // -------------------------------------------------------------
    // 3. Message transfer (through link) step.
    // -------------------------------------------------------------
    link_stage(); // This stage handles data transfer through the inter-router link.
    if(link2 >= 1)//concentrated == YES && hybrid_topology == NO)
      link_stage2(); // This stage handles data transfer through the inter-router link.
    if(link2 >= 2)//concentrated == YES && hybrid_topology == NO)
      link_stage3(); // This stage handles data transfer through the inter-router link.
    if(link2 >= 3)//concentrated == YES && hybrid_topology == NO)
      link_stage4(); // This stage handles data transfer through the inter-router link.

    // -------------------------------------------------------------
    // 4. Message ejection step.
    // -------------------------------------------------------------
    nic_input_buf_to_PE_stage(); // This stage handles message ejection in NIC.

    // -------------------------------------------------------------
    // 5. Update buffer and vc states step.
    // -------------------------------------------------------------
    update_mbox_count();    // This should be called before update_is_ready_flag().
    update_vc_info_stat();
    update_is_ready_flag(); // This should be called after update_mbox_count().
    //update_xbar_reserve_status();
    update_link_usage(); // Used in calculating link throughput.

    /*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*/
    //Source Queue logic
    if(source_queue_logic_enabled)
      for(int node=0; node<NUM_NODES; node++)
      {
        source_queue_delete(node);

        source_queue_size_total[node] += source_queue[node].size();
        source_queue_size_samples[node]++;

        if(!(sim_clock%nox_stat_interval))
          printf("%.0lf-",source_queue_size_total[node]/source_queue_size_samples[node]);
        //printf("%d-",source_queue[node].size());//,source_queue[node].size() ?source_queue[node][0].id :999); 
      }
    if(source_queue_logic_enabled)
      if(!(sim_clock%nox_stat_interval))
        printf("\n");

    // sim_clock++;
    sim_clock+=CLOCK_FREQ_SCALE;
    
    // -------------------------------------------------------------
    // 6. Deadlock and error checking step.
    // -------------------------------------------------------------
    static long long last_deadlock_chk_cycle = 0;
    static long long deadlock_chk_ejt_msg = -1;
    //exit in case of deadlock or other error!
    if(exit_sim)
    {
      for(int node=0; node<NUM_NODES; node++)
      {
        NUM_PC     = router_info[node].num_pc;
        NUM_NIC_PC = router_info[node].num_nic_pc;
        NUM_VC     = router_info[node].num_vc;
        for(int pc=0; pc<NUM_PC; pc++)
          for(int vc=0; vc<NUM_VC; vc++)
          {
            //if((pc < NUM_PC-NUM_NIC_PC) || (hybrid_topology == YES && node >= NUM_NODES/2 && pc==NUM_PC-1))
            if(verbose == YES)
            {
              printf("[%d][%d][%d] %s\n",node,pc,vc,vc_state[vc_info[node][pc][vc].vc_stat]);
              print_mbox(&(router_input_buf[node][pc][vc]));
            }
          }
      }
      exit(1);
    }

    if(sim_clock/DEADLOCK_PERIOD != last_deadlock_chk_cycle && sim_clock > 1000)
    {
      if(num_ejt_msg < num_inj_msg-10 && num_ejt_msg == deadlock_chk_ejt_msg)
      {
        printf("Deadlock reached !! num_ejt_msg:%lld deadlock_chk_ejt_msg:%lld last_deadlock_chk_cycle:%lld sim_clock:%lld\n",
            num_ejt_msg, deadlock_chk_ejt_msg, last_deadlock_chk_cycle, sim_clock);
        exit_sim = true;
        verbose = YES;
      }
      last_deadlock_chk_cycle = sim_clock/DEADLOCK_PERIOD;
      deadlock_chk_ejt_msg = num_ejt_msg;
    }
    // -------------------------------------------------------------
    // 7. Statistics step.
    // -------------------------------------------------------------

    if(verbose == YES)
    {
      printf("%lld-[%d/%d]***************************\n",(long long int)sim_clock, num_inj_msg, num_ejt_msg);
#ifdef TR_INTEG
      printf("csim_inj_msgs : %lld\n",csim_inj_msgs);
#endif
      fflush(stdout);
    }
    else
    {
      if(sim_clock % nox_stat_interval == 0)       
      {

        if(source_queue_logic_enabled)
        {
          printf("**** sim_clock:%lld\n",sim_clock);
          for(int r=0; r < NUM_ROWS; r++)
          {
            for(int c=0; c < NUM_COLS; c++)
            {
              printf("%5.0lf ",source_jobs_done[r*NUM_COLS + c]);
              //printf("%5.0lf|%8.0lf ",source_jobs_done[r*NUM_COLS + c],
              //  source_job_delay[r*NUM_COLS+c]/source_jobs_done[r*NUM_COLS + c]); 
            }
            printf("\n");
          }
          printf("Router Occupancy Matrix\n");
          for(int r=0; r < NUM_ROWS; r++)
          {
            for(int c=0; c < NUM_COLS; c++)
            {
              printf("%6.2lf ",(router_occupancy[r*NUM_COLS + c]-last_rp[r*NUM_COLS + c])/(double)nox_stat_interval); 
              last_rp[r*NUM_COLS + c] = router_occupancy[r*NUM_COLS + c];
            }
            printf("\n");
          }

        }
#if 1
        double cycles, packets, delay;
        cycles  = sim_clock - last_cycles - sim_start_clock;
        packets = num_ejt_msg - last_packets;
        delay   = total_packet_delay_cycle - last_delay;
        last_cycles = sim_clock;
        last_delay = total_packet_delay_cycle;
        last_packets = num_ejt_msg;
        //printf("%lld-[%d/%d/%.2f/q:%.2f/%2.2f%%][L:%d/R:%d/A:%d][%d/%d][%lld/%lld][%.2lf][%.0lf/%.2lf%%/%.2lf]\n", (long long int)sim_clock, num_ejt_msg,            num_inj_msg, total_packet_delay_cycle / (double)(num_ejt_msg),            (float)total_queueing_latency/(float)(num_ejt_flit/*msg*MSG_LEN*/),             (float)(num_inj_msg*100)/(float)(NUM_PE*(sim_clock-sim_start_clock)),            num_link_err, num_routing_err, num_swarbiter_err, num_e2e_nack, num_e2e_retrans,             (long long)num_ejt_flit, (long long)num_inj_flit, msg_combine_candidate_ratio,            packets,100*(packets/(NUM_PE*cycles)),delay/packets);
#endif

#ifndef TR_INTEG 
        if(RESP_RQ_TRAFFIC)
          printf("nic queue size : %d control msgs :%d data msgs :%d\n", nic_queue.size(), control_ejt_msg, data_ejt_msg);
        else
#endif
         // printf("control msgs :%d data msgs :%d\n", control_ejt_msg, data_ejt_msg);
      }
      fflush(stdout);
#ifdef TRACE_READ
      if(num_ejt_msg % 10000 == 0 && num_ejt_msg && last_print != num_ejt_msg / 10000)
      {
        //sim_result();
        last_print = num_ejt_msg / 10000;
      }
#else
#ifndef TR_INTEG
      if(sim_clock % (100*nox_stat_interval) == 0)
        sim_result();
#endif
#endif

    }// else not verbose interval stats

    // -------------------------------------------------------------
    // 8. End of clock cycle
    // -------------------------------------------------------------

  }// while


  // -------------------------------------------------------------
  // Final Results
  // -------------------------------------------------------------
#ifndef TR_INTEG
  // Simulation finished. Print out the simulation result.
  sim_result();
#endif
}

void update_mbox_count()
{
  int node, pc, vc;

  for(node=0; node<NUM_NODES; node++)
  {
    NUM_PC     = router_info[node].num_pc;
    NUM_NIC_PC = router_info[node].num_nic_pc;
    NUM_VC     = router_info[node].num_vc;
    for(pc=0; pc<NUM_PC; pc++)
      for(vc=0; vc<NUM_VC; vc++)
      {
        update_cnt(&(router_input_buf[node][pc][vc]));
        update_cnt(&(xbar_buf        [node][pc][vc]));

        //check remove later
        //if(pc < NUM_PC-NUM_NIC_PC)
        if(ATOMIC_BUFFER)
        {
          if(is_mbox_atomic(&(router_input_buf[node][pc][vc]))==false)
          {
            printf("num packets[%d][%d][%d]: %d\n",node,pc,vc,router_input_buf[node][pc][vc].num_msg);
            print_mbox(&(router_input_buf[node][pc][vc]));
            exit(1);
          }
        }
        if(pc < NUM_PC-NUM_NIC_PC)
        {
          update_cnt(&(link_buf[node][pc][vc]));
          update_cnt(&(link_buf2[node][pc][vc]));
          update_cnt(&(link_buf3[node][pc][vc]));
          update_cnt(&(link_buf4[node][pc][vc]));
        }

        if(pc < NUM_PC-NUM_NIC_PC && router_info[node].type == LOCAL)
          ROUTER_INPUT_BUF_SIZE = ROUTER_INPUT_BUF_SIZE*HYB_VC_RATIO;

        // Check for the buffer overflow. Buffer overflow should not happen.
        if(verbose == YES)
          if(msg_cnt(&router_input_buf[node][pc][vc]) > ROUTER_INPUT_BUF_SIZE + link2 + 1)
          {
            printf("BAD!! Buffer Overflow at [%d,%d,%d,%d] bd:%d\n", node,pc,vc,msg_cnt(&router_input_buf[node][pc][vc]),ROUTER_INPUT_BUF_SIZE);
            print_mbox(&(router_input_buf[node][pc][vc]));
          }

        if(pc < NUM_PC-NUM_NIC_PC && router_info[node].type == LOCAL)
          ROUTER_INPUT_BUF_SIZE = ROUTER_INPUT_BUF_SIZE/HYB_VC_RATIO;

        // For statistic. Necessary for calculating total buffer utilization.
        if(sim_clock > warmup_cycle)
          total_buf_usage[node] += 
            (msg_cnt(&(router_input_buf[node][pc][vc])));
      }

    for(pc=0; pc<NUM_NIC_PC; pc++)
      for(vc=0; vc<NUM_NIC_VC; vc++)
      {
        update_cnt(&(nic_input_buf [node][pc][vc]));
        update_cnt(&(nic_output_buf[node][pc][vc]));
      }

  }
}

/*void update_interference_cycles()
  {
  int node, pc, vc;

  for(node=0; node<NUM_NODES; node++)
  {
  NUM_PC     = router_info[node].num_pc;
  NUM_NIC_PC = router_info[node].num_nic_pc;
  NUM_VC     = router_info[node].num_vc;
  for(pc=0; pc<NUM_PC; pc++)
  for(vc=0; vc<NUM_VC; vc++)
  {
//if(node == 5 && pc == 4 && vc == 6)
//printf("[%d][%d][%d] win id:%d\n",node,pc,vc,SA_win_id[node][pc][vc]);
if(SA_win_id[node][pc][vc] > -1)
{
mbox_update_icycles(&(router_input_buf[node][pc][vc]),SA_win_id[node][pc][vc], 1, 1);
if(pc < NUM_NIC_PC)
mbox_update_icycles(&(nic_output_buf[node][pc][vc]),SA_win_id[node][pc][vc], 1, 2);
}
if(SA_win_id[node][pc][vc] == -2)
{
mbox_update_icycles(&(router_input_buf[node][pc][vc]),SA_win_id[node][pc][vc], 2, 1);
if(pc < NUM_NIC_PC)
mbox_update_icycles(&(nic_output_buf[node][pc][vc]),SA_win_id[node][pc][vc], 2, 2);

}
}
}
}*/

void update_vc_info_stat()
{
  int node, pc, vc;

  for(node=0; node<NUM_NODES; node++)
  {
    NUM_PC     = router_info[node].num_pc;
    NUM_NIC_PC = router_info[node].num_nic_pc;
    NUM_VC     = router_info[node].num_vc;
    for(pc=0; pc<NUM_PC; pc++)
      for(vc=0; vc<NUM_VC; vc++)
      {
        int out_pc = vc_info[node][pc][vc].out_pc; 
        switch(vc_info[node][pc][vc].vc_stat)
        {
          case VC_IDLE_TMP    :{ vc_info[node][pc][vc].vc_stat = VC_IDLE;     break; }
          case VC_ROUTING_TMP :{ vc_info[node][pc][vc].vc_stat = VC_ROUTING;  break; }
          case VC_VA_DONE_TMP :{ vc_info[node][pc][vc].vc_stat = VC_VA_DONE;  break; }
                               // For multi cycle SA stage (Needed for Hybrid)
          case VC_SA_DONE_TMP : {
                                  /*if(node_switch[node] == BUS_SWITCH)
                                    vc_info[node][pc][vc].vc_stat = VC_SA_DONE_TMP1;  
                                    else*/
                                  vc_info[node][pc][vc].vc_stat = VC_SA_DONE;  
                                  break; 
                                }
          case VC_SA_DONE_TMP1 : {
                                   vc_info[node][pc][vc].vc_stat = VC_SA_DONE;  
                                   break; 
                                 }

          case VC_SA_DONE_TMP2 : {
                                   //Not used currently
                                   if(node_switch[node] == BUS_SWITCH)
                                     vc_info[node][pc][vc].vc_stat = VC_SA_DONE_TMP3;  
                                   else
                                     vc_info[node][pc][vc].vc_stat = VC_SA_DONE;  
                                   break; 
                                 }

          case VC_SA_DONE_TMP3 :{ vc_info[node][pc][vc].vc_stat = VC_SA_DONE;  break; }
        }
        /*      if(vc_info[node][pc][vc].vc_stat == VC_SA_DONE)
                {
                int out_pc = vc_info[node][pc][vc].out_pc; 
                xbar_in_free[node][pc] = YES;
                xbar_out_free[node][out_pc] = YES;
                }*/
      } 
  }// End of for nodexpcxvc
}

void print_vc_state(int n)
{
  int node, pc, vc;
  int r,c;

  for(pc=0; pc<NUM_PC; pc++)
  {
    for(vc=0; vc<NUM_VC; vc++)
      printf("%s-", vc_state[vc_info[n][pc][vc].vc_stat]);
    printf("|");
  }

  printf(" @clock:%lld \n",sim_clock);

}

void update_is_ready_flag()
{
  int node, pc, vc, next_node, next_pc, pc_index;
  int narbiter_flit, nxbar_flit, nlink_flit, nnic_flit;
  flit_t *flit_ptr;
  int sum[MAX_PC];

  for(node=0; node<NUM_NODES; node++)
  {
    NUM_PC     = router_info[node].num_pc;
    NUM_NIC_PC = router_info[node].num_nic_pc;
    NUM_VC     = router_info[node].num_vc;
    for(pc=0; pc<NUM_PC; pc++)
    {
      sum[pc] = 0;
      for(vc=0; vc<NUM_VC; vc++)
        sum[pc] += msg_cnt(&router_input_buf[node][pc][vc]); 

      for(vc=0; vc<NUM_NIC_VC; vc++)
        if(pc >= NUM_PC-NUM_NIC_PC)
          is_ready[node][pc][vc].noutbuf = 
            (msg_cnt(&(nic_output_buf[node][pc_index][vc])) <= NIC_OUTPUT_BUF_SIZE - MSG_LEN)? YES:NO;

      for(vc=0; vc<NUM_VC; vc++)
      {
        if(pc >= NUM_PC-NUM_NIC_PC)
        {
          // Check whether there is a flit that are selected in the arbiter and scheduled
          // to be sent to the xbar at the next cycle. If so, we need to take this flit 
          // into consideration as well when we set the value of the is_ready[][][].ninbuf.
          // has_flit[][][] indicates that there is a flit selected in the arbiter stage
          // at current cycle.
          narbiter_flit = (has_flit[node][pc][vc].arbiter == YES)? 1:0;


          pc_index = pc - (NUM_PC-NUM_NIC_PC);
          // 2. ninbuf
          // Now, we have set is_ready[][][].ninbuf so that the arbiter can find out the 
          // status of the NIC input buf. Here, we consider narbiter_flit together as 
          // described above since it can still go to the NIC input buf even if is_ready 
          // is set to NO here.
          is_ready[node][pc][vc].ninbuf = 
            //(msg_cnt(&nic_input_buf[node][pc_index][vc]) == NIC_INPUT_BUF_SIZE - narbiter_flit)? 
            (msg_cnt(&nic_input_buf[node][pc_index][vc]) >= NIC_INPUT_BUF_SIZE)? NO:YES;

          // 3. noutbuf
          // Set the status of the nic output buf. This flag is checked when the application 
          // want to send messages to the NIC. An application send a message (composed of 
          // MSG_LEN flits) at one time. Thus, the buffer should be able to accomodate at least 
          // MSG_LEN flits to set this flag to YES.
          is_ready[node][pc][vc].noutbuf = 
            (msg_cnt(&(nic_output_buf[node][pc_index][vc])) <= NIC_OUTPUT_BUF_SIZE - MSG_LEN)? YES:NO;
        }

        // 4. rinbuf
        // Now, we set is_ready[][][].rinbuf so that the arbiter can find out the status of the
        // router input buf. Here, -nxbar_flit and -nlink_flit indicates the number of flit(s) 
        // that are going to go through the xbar and the link respectively. 
        // Note that they have already passed the arbitration stage and are on their way to the
        // router regardless of the input buffer status.

        if(pc < NUM_PC-NUM_NIC_PC)
        {
          if(router_info[node].type == LOCAL)
            ROUTER_INPUT_BUF_SIZE = ROUTER_INPUT_BUF_SIZE*HYB_VC_RATIO;

          // if N,S,E,W, we need to check previous router xbar and link for ongoing flits.
          // Get the node number of the neighbor connected by the physical channel pc.
          next_node = neighbor[node][pc];

          if(next_node == EDGE)
            // if next node is EDGE, this router input buffer is always not ready.
            // set the credit to 0 meaning no buffer slots are available.
            is_ready[node][pc][vc].rinbuf = 0;
          else
          {
            // Get the PC number of the neighbor connected by the physical channel pc.
            //next_pc = (pc+(NUM_PC-NUM_NIC_PC)/2) % (NUM_PC-NUM_NIC_PC);
            next_pc = neighbor_pc[node][pc];

            // Check the flits that are going to go through the xbar and the link.
            // Currently, assume that link stage takes 1 cycle and thus only one flit can
            // be transferred at the link at a cycle. (Should be modified to accomodate 
            // longer link delay)
            narbiter_flit = (has_flit[next_node][next_pc][vc].arbiter == YES)? 1:0;
            nxbar_flit    = (has_flit[next_node][next_pc][vc].xbar    == YES)? 1:0;


            // reset has_flit.
            has_flit[next_node][next_pc][vc].arbiter = NO;
            has_flit[next_node][next_pc][vc].xbar    = NO;

            // This value indicates the credit (number of available buffer slot) of each VC.
            is_ready[node][pc][vc].rinbuf = ROUTER_INPUT_BUF_SIZE - 
              msg_cnt(&router_input_buf[node][pc][vc]) - narbiter_flit - nxbar_flit;

            is_ready[node][pc][vc].pinbuf = 1;//(NUM_VC*ROUTER_INPUT_BUF_SIZE / num_priority_levels) - sum[pc];
          }

          if(router_info[node].type == LOCAL)
            ROUTER_INPUT_BUF_SIZE = ROUTER_INPUT_BUF_SIZE/HYB_VC_RATIO;
        }
        else
        {
          // Injection Channel.
          pc_index = pc - (NUM_PC-NUM_NIC_PC);
          nnic_flit = (has_flit[node][pc][vc].nic == YES)? 1:0;

          // Only need to check the input buffer of the current router.
          // This value indicates the credit (number of available buffer slot) of each VC.
          is_ready[node][pc][vc].rinbuf = 
            (msg_cnt(&router_input_buf[node][pc][vc]) < ROUTER_INPUT_BUF_SIZE )? YES:NO;
        }

        // This part is included here for the safety issue.
        // This flag is checked in the stage1() and set in the stage2(). Thus, if you  
        // modify the sequence of the function call above such that stage2() is called 
        // before stage1(), the updated value of this flag in stage2() will affect the 
        // checking routine in the stage1(). 
        // To prevent this, set the flag value to the 'arbiter_tmp' instead of the 
        // 'arbiter' itself in stage2(). Then, at the end of each cycle, set the value 
        // of the 'arbiter' to  that of the 'arbiter_tmp' as shown below.
        // 
        // As long as the stage1() is called before the stage2(), following line is not
        // not necessary.
        //
        // is_ready[node][pc][vc].arbiter =  is_ready[node][pc][vc].arbiter_tmp; 

      }// for vc
    }
  }
}

void update_link_usage()
{
  int node, pc, vc;

  if(num_ejt_msg < WARMUP_MSG)
    return;

  // Even if we iterate for every VC in a PC, only one of the VCs can
  // have a flit in the link_buf due to the switch arbitraion.
  for(node=0; node<NUM_NODES; node++)
  {
    NUM_PC     = router_info[node].num_pc;
    NUM_NIC_PC = router_info[node].num_nic_pc;
    NUM_VC     = router_info[node].num_vc;
    if(router_info[node].type != LOCAL)
      for(pc=0; pc<NUM_PC-NUM_NIC_PC; pc++)
        for(vc=0; vc<NUM_VC; vc++)
        {
          link_usage[node][pc] += msg_cnt(&link_buf[node][pc][vc]);
          link_usage[node][pc] += msg_cnt(&link_buf2[node][pc][vc]);
          link_usage[node][pc] += msg_cnt(&link_buf3[node][pc][vc]);
          link_usage[node][pc] += msg_cnt(&link_buf4[node][pc][vc]);
        }
    if(router_info[node].type == LOCAL)
      for(pc=0; pc<NUM_PC; pc++)
        for(vc=0; vc<NUM_VC; vc++)
          link_usage[node][pc] += msg_cnt(&xbar_buf[node][pc][vc]);
  }
}

bool sort_val (sort_t a, sort_t b)
{
  if(a.val < b.val)
    return true;
  else
    return false;
}
bool sort_pos (sort_t a, sort_t b)
{
  if(a.pos < b.pos)
    return true;
  else
    return false;
}

void print_pri_array()
{
  for(int n=0; n< MAX_PRIORITY_LEVELS; n++)
    printf("n:%d val:%.2lf pos:%d\n",n, priority_array[n].val, priority_array[n].pos);

}


